
lagging <- function(x) {
  sortx <- x[order(x$year),]
  relevant <- data.frame(year = sortx$year,
                         lag_ln_GDPpc = sortx$ln_GDPpc,
                         lag_GDPpc = sortx$GDPpc,
                         lag_ln_Pop = sortx$ln_Pop,
                         lag_Growth = sortx$Growth, lag_Gini = sortx$Gini,
                         lag_Democ = sortx$Democ, lag_Autoc = sortx$Autoc,
                         lag_Polity = sortx$Polity,
                         lag_IMR = sortx$IMR,
                         lag_NHI = sortx$NHI, lag_PHI = sortx$PHI, lag_LDG = sortx$LDG,
                         lag_Conf_Same = sortx$Conf_Same,
                         lag_Conf_Neighb_As = sortx$Conf_Neighb_As,
                         lag_Conf_Neighb = sortx$Conf_Neighb, lag_Instab = sortx$Instab,
                         lag_Rent = sortx$Rent, lag_ln_Rent = sortx$ln_Rent)
  relevant$year <- relevant$year + 1
  lagged <- merge(sortx, relevant, by = c("year"), all.x = T)
  return(lagged)
}

### A function to construct a dataframe for specific analysis
## If "preact_def" is 1, former rebels are defined on the last two years of the previous conflict. Otherwise, the last year.
## If "recast_def" is 1, conflict recurrence involving both former and new rebels will be coded as recast conlfict. Otherwise, repeated.
## If "RV" is 1, the dataset will include cases of rebel victories
## If "both" is 1, conflict recurrence with both former and new rebels will be kept as an independent category of events.
arrangement <- function(dat, preact_def = 1, recast_def = 1, RV = 0, both = 0) {
  if (RV == 0) {
    dat <- dat[dat$outcome!=4,]
  }
  st <- dat$rstatus
  st2 <- dat$y2rstatus
  out <- data.frame(cid = dat$ConflictId, pid = dat$pid, event = dat$event, year = dat$Year,
                     t_time = dat$t_time, country = dat$cnum, cumintense= dat$CumulativeIntensity,
                     Victory = as.numeric(dat$outcome %in% c(3, 4)), Agreement = as.numeric(dat$outcome == 1),
                     PKO = as.numeric(dat$PKO_TC == 1 | dat$UNPKO_HW == 1 | dat$NUNPKO_HW == 1),
                     Revolution = ifelse(dat$Incompatibility == 2, 1, 0),
                     ln_Deaths = log(dat$deaths), ln_Rel_Deaths = dat$deaths/dat$pop_g,
                     Duration = dat$cdur, ln_Duration = log(dat$cdur),
                     Num_Actor_s = dat$numact, Num_Actor = dat$y2numact,
                     ln_GDPpc = log(dat$rgdppc_g), GDPpc = dat$rgdppc_g,
                     ln_Pop = log(dat$pop_g), Growth = dat$growth_g,
                     Gini = dat$gini, LDG = dat$LDG, PHI = dat$PHI, NHI = dat$NHI,
                     Res_Conf = dat$res_confl, Res_Conf_Dist = dat$distribution, Res_Conf_Fin = dat$finance,
                     Res_Conf_Agg = dat$aggrav, Instab = dat$instab,
                     Fragmentation = dat$fragm, Fragm_same = dat$fragm_same,
                     Democ = as.numeric(dat$p4 >= 5), Autoc = as.numeric(dat$p4 <= -5), Polity = dat$p4 + 10,
                     ELF = dat$ef, ELF2 = dat$ef^2, Mountain = dat$mountain, IMR = dat$imr,
                     outcome = dat$outcome, Conf_Same = dat$conf_same, Spl_Rec = dat$spl_rec,
                     Post_CW = ifelse(dat$Year >= 1989, 1, 0),
                     Conf_Neighb_As = dat$conf_neighb_as, Conf_Neighb = dat$conf_neighb_nc,
                     Rent = dat$rent, ln_Rent = log(dat$rent + 1))
  
  if (preact_def == 1 & recast_def == 1) {
    out$status = dat$event * ifelse(st == 0, 0, ifelse(st == 3, 2, 1))
    out$state = ifelse(st == 0, 0, ifelse(st == 3, 2, 1))
  } else if (preact_def == 1 & recast_def == 2) {
    out$status = dat$event * ifelse(st == 0, 0, ifelse(st == 1, 1, 2))
    out$state = ifelse(st == 0, 0, ifelse(st == 1, 1, 2))
  } else if (recast_def == 1) {
    out$status = dat$event * ifelse(st2 == 0, 0, ifelse(st2 == 3, 2, 1))
    out$state = ifelse(st2 == 0, 0, ifelse(st2 == 3, 2, 1))
  } else {
    out$status = dat$event * ifelse(st2 == 0, 0, ifelse(st2 == 1, 1, 2))
    out$state = ifelse(st2 == 0, 0, ifelse(st2 == 1, 1, 2))
  }
  
  if(both == 1){
    out$status_both = as.numeric(dat$event) * st2
    out$state_both = st2
  }
  

  lagl <- split(out, out$pid)
  laggedlist <- lapply(lagl, lagging)
  outdata <- data.frame()
  for (j in 1:length(laggedlist)) {
    outdata <- rbind(outdata, laggedlist[[j]])
  }

  return(outdata)
}

## a function generating dataframes for subdistributional competing risk models

subdist_admincens <- function(data, id = "pid", year = "year", t_time = "t_time", state = "state", failcode, cens_time, event = "event") {
  
  cutdat <- data[data[,which(names(data) == year)] <= cens_time,]
  ## the dataframe is splitted into a list by the unit of analysis.
  spl <- split(cutdat, cutdat[,which(names(cutdat) == id)])
  
  output <- data.frame()
  for (i in 1:length(spl)) {
    this <- spl[[i]]
    this <- this[order(this[,which(names(this) == year)]), ]
    ## is there any failure in the unit ?
    if (sum(this[,which(names(this) == event)]) != 0) {
      if (this[1, which(names(this) == state)] != failcode) {
        out <- this[-nrow(this),]
        last <- out[nrow(out),]
        replast <- data.frame()
        for (j in last[,which(names(this) == year)]:cens_time) {
          thistime <- last
          thistime[,which(names(this) == year)] <- j
          thistime[,which(names(this) == t_time)] <- j - (last[,names(this) == year] - last[,names(this) == t_time])
          replast <- rbind(replast, thistime)
        }
        out <- rbind(out, replast[-1,])
      } else {
        out <- this
      }
    } else {
      out <- this
    }
    output <- rbind(output, out)
  }
  
  return(output)
}

### A function to implement cause-specific and subdistribution hazard models
##
competing_risks <- function(x, indep, c_time = 2015, method = "efron", subest = T) {
  pool <- Surv(x$t_time-1, x$t_time, x$status != 0)
  cox <- coxph(formula = formula(paste("pool ~ ", indep, sep = "")), x, ties = method)
  
  if (subest == T){
    subdis_frame_1 <- subdist_admincens(x, failcode = 1, cens_time = c_time)
    subdis_frame_2 <- subdist_admincens(x, failcode = 2, cens_time = c_time)
  
    sub_repeat <- Surv(subdis_frame_1$t_time-1, subdis_frame_1$t_time, subdis_frame_1$status != 0)
    sub_recast <- Surv(subdis_frame_2$t_time-1, subdis_frame_2$t_time, subdis_frame_2$status != 0)
  
    subdis_res_1 <- coxph(formula = formula(paste("sub_repeat ~ ", indep, sep = "")), subdis_frame_1, ties = method)
    subdis_res_2 <- coxph(formula = formula(paste("sub_recast ~ ", indep, sep = "")), subdis_frame_2, ties = method)
  }
  
  c_repeat <- Surv(x$t_time-1, x$t_time, x$status == 1)
  c_recast <- Surv(x$t_time-1, x$t_time, x$status == 2)
  cause_res_1 <- coxph(formula = formula(paste("c_repeat ~ ", indep, sep = "")), x, ties = method)
  cause_res_2 <- coxph(formula = formula(paste("c_recast ~ ", indep, sep = "")), x, ties = method)
  
  if (subest == T){
      list(cox, subdis_res_1, subdis_res_2, cause_res_1, cause_res_2)
  } else {
      list(cox, NA, NA, cause_res_1, cause_res_2)
  }
}

## A function that returns a DataFrame that is used in regression estimations.

model_extract <- function(dat, formula) {
  string_formula <- gsub(pattern = "+ ", "", gsub("+\n", "", formula, fixed = T), fixed = T)
  vars <- strsplit(string_formula, " ")[[1]]
  indep_data <- dat[, vars]
  complete_rows <- complete.cases(indep_data)
  return(dat[complete_rows,])
}

## A function that returns a table of the number of events
freq_tab <- function(data){
  freq <- table(data$status)[2:3]
  freq <- c(freq, sum(freq))
  names(freq) <- c("Repeated", "Recast", "Total")
  return(freq)
}

## A function to make a formula string for BMA analysis
makeform <- function(x) {
  outform <- paste(x[1], "")
  for (term in x[-1]) {
    outform <- paste(outform, "+", term)
  }
  outform <- paste(outform, "+ cluster(cid)")
  return(outform)
}